In [1]:
from termcolor import colored
from sklearn.tree import DecisionTreeClassifier
import missingno as msno
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
import pickle
import pprint
from sklearn.ensemble import RandomForestRegressor
from pandas_profiling import ProfileReport
from dateutil import relativedelta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from statsmodels.regression.linear_model import OLS

plot_______ = False
plot_______ = True

def new_line():
    print("\n-------------------------\n")

def RMSE(predictions):
    return round(np.sqrt(((test_y - predictions)**2).mean()))

def plot_numerical_columns(col_name):
    if not plot_______:
        return None

    # Histogram
    df[col_name].plot(kind="hist", figsize=(13,8));
    plt.title(col_name, size=18);
    plt.axhline(y=df[col_name].mean(), color='red');
    plt.axhline(y=df[col_name].median(), color='green');
    plt.legend(['Actual', 'Mean', 'Median']);
    plt.show()

    # Scatter plot
    df[col_name].plot(figsize=(13,8));
    plt.title(col_name, size=18);
    plt.axhline(y=df[col_name].mean(), color='red');
    plt.axhline(y=df[col_name].median(), color='green');
    plt.legend(['Actual', 'Mean', 'Median']);
    plt.show()

    # scatter plot (sort by values), values Vs index
    df[col_name].sort_values().reset_index(drop=True).plot(figsize=(13,8));
    plt.title(col_name+" (SORTED)", size=18);
    plt.axhline(y=df[col_name].mean(), color='red');
    plt.axhline(y=df[col_name].median(), color='green');
    plt.legend(['Actual', 'Mean', 'Median']);
    plt.show()

    # box plot
    df[col_name].plot(kind="box", figsize=(13,8))
    plt.title(col_name, size=18);
    plt.xlabel("");
    plt.show()

def plot_date_columns(col_name):
    if not plot_______:
        return None
    df[col_name].plot(figsize=(15,7), grid=True);
    plt.xlabel("Index", size=14);
    plt.ylabel("Date", size=14);
    plt.title(col_name + " Graph", size=18);
    plt.show();

    df[col_name].sort_values().reset_index(drop=True).plot(figsize=(15,7), grid=True);
    plt.xlabel("Index (sorted)", size=14);
    plt.ylabel("Year", size=14);
    plt.title(col_name + " Graph", size=18);
    plt.show();

    (df[col_name].dt.year.value_counts(sort=False).sort_index() / len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
    plt.xlabel("Year", size=14);
    plt.ylabel("Ratio (1-100)", size=14);
    plt.title(col_name + " year Frequency Graph", size=18);
    plt.show();

    (df[col_name].dt.month.value_counts().sort_index()/len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
    plt.xlabel("Month", size=14);
    plt.ylabel("Ratio (1-100)", size=14);
    plt.title(col_name + " month Frequency Graph", size=18);
    plt.show();

    (df[col_name].dt.day.value_counts().sort_index()/len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
    plt.xlabel("Day", size=14);
    plt.ylabel("Ratio (1-100)", size=14);
    plt.title(col_name + " Day Frequency Graph", size=18);
    plt.show();

def plot_catagorical_columns(cat_variable):
    if not plot_______:
        return None
    (df[cat_variable].value_counts() / len(df) * 100).plot.bar(figsize=(15,6), grid=True);
    plt.title(cat_variable, size=18, color='r');
    plt.xlabel("Catagory", size=14, color='r');
    plt.ylabel("Ratio (1-100)", size=14, color='r');
    plt.show()

def data_shape():
    return f"The Data have:\n\t{df.shape[0]} rows\n\t{df.shape[1]} columns\n"
#===
# df = pd.read_csv("data.csv", date_parser=True)

# df = pd.read_csv("df_only_selected_columns_using_PCA.csv", date_parser=True)
# target_variable = "ACTUAL_WORTH"
# df = pd.concat([
#         df.select_dtypes("number").iloc[:, :3],
#         df.select_dtypes("O").iloc[:, :3],
#         df.select_dtypes(exclude=["number", "O"]),
#         df[[target_variable]]], 1)
# target_variable = "AREA_NAME_EN"

# df = pd.read_csv("cleaned_data.csv", date_parser=True)
# target_variable = "SalePrice"

train = pd.read_csv("/home/amir/Downloads/train.csv")
test  = pd.read_csv("/home/amir/Downloads/test.csv")
target_variable = "SalePrice"
train_y = train[target_variable]
train = train.drop(columns=target_variable)
df = pd.concat([train, test])
df[target_variable] = train_y.to_list() + [None]*len(test)
#===
new_line()
print(data_shape())
#===
new_line()
print(f"Columns types distribution:\n\n{df.dtypes.value_counts()}\n")
df.dtypes.value_counts().plot(kind='barh', figsize=(10, 2), grid=True, title="Variable types Count Graph");
plt.xlabel("Count");
plt.show()
#===
f = df[target_variable].isna().sum()
if f:
    new_line()
    to_print = f"There are {f} NAs in target values, we droped those rows"
    print(colored(to_print, 'red'))
    df = df[df[target_variable].notna()]
del f
#---------------------------------------------------
# df.select_dtypes("O").columns[:5]
# D = df.select_dtypes(exclude="O")
# D2 = df.select_dtypes("O").iloc[:,:5]
# df = pd.concat([D, D2], 1)

# profile = ProfileReport(df, title='Pandas Profiling Report', explorative=True)
# profile.to_file("your_report.html")
#---------------------------------------- NA
a = df.isna().sum().where(lambda x:x>0).dropna()
if a.size:
    new_line()
    to_print = f"There are {len(a)} (out of {df.shape[1]}, [{round(len(a)/df.shape[1]*100)}%]) columns that contains 1 or more NA."
    print(colored(to_print, 'red'))

    for i in a.index:
        df[i+"_NA_indicator"] = df[i].isna().replace({True : "Missing", False : "Not missing"})
    new_line()
    to_print = f"{a.size} NA_indicator variables added to the data\n"
    print(colored(to_print, 'red'))


    print("========= NA Graphs =========\n")
    msno.matrix(df);
    plt.title("NA Graph");
    plt.show()

    new_line()
    sns.heatmap(df.isnull(), cbar=False);
    plt.title("NA Graph");
    plt.show()
#===
a = a.sort_values()/len(df)*100
if (a == 100).sum():
    new_line()
    df.drop(columns=a[a==100].index, inplace=True)
    to_print = f"There are {(a == 100).sum()} columns that are all Missing values, so we droped those.\nNow {data_shape()}\n\nDropped columns names:"
    print(colored(to_print, 'red'))
    for i in a[a==100].index:
        print("\t",i)
    a = a[a != 100]
#===
x = df[a.index].dtypes.value_counts()
if x.size:
    new_line()
    print(f"NA columns data type Distribution:\n\n{x}")
del x
#===
new_line()
if a.size:
    print(f"NaN Ratio (0-100)\n\n{a}")
else:
    print(colored("Now There is no NaN value in our Data", 'red'))
#===
# ----------------------------------------------- Imputing Missing values
# ------------------------------------ Numerical columns imputing
if df.select_dtypes("number").isna().sum().sum():
    new_line()
    print(f'(Before Missing values treatment)\nThere are {df.isna().sum().sum()} Missing values:\n\t{df.select_dtypes("O").isna().sum().sum()} in catagorical variables\n\t{df.select_dtypes("number").isna().sum().sum()} in numerical columns\n\t{df.select_dtypes(exclude=["O", "number"]).isna().sum().sum()} in others')
    from sklearn.impute import KNNImputer
    df_not_a_number  = df.select_dtypes(exclude="number")
    df_number        = df.select_dtypes("number")
    del df
    imputer = KNNImputer(n_neighbors=4, weights="uniform")
    imputed = imputer.fit_transform(df_number)
    df_number = pd.DataFrame(imputed, columns=df_number.columns)
    df = pd.concat([df_not_a_number.reset_index(drop=True), df_number.reset_index(drop=True)], axis=1)
    del df_not_a_number
    del df_number
    print(f'\n(After filling numeric missing values)\nThere are {df.isna().sum().sum()} Missing values:\n\t{df.select_dtypes("O").isna().sum().sum()} in catagorical variables\n\t{df.select_dtypes("number").isna().sum().sum()} in numerical columns\n\t{df.select_dtypes(exclude=["O", "number"]).isna().sum().sum()} in others')
#===
# -------------------------------- Catagoriacal variables imputating
vars_to_fill = df.select_dtypes("O").isna().mean().where(lambda x:x>0).dropna().sort_values(ascending=True)
if vars_to_fill.size:
    for col in vars_to_fill.index:
        tr = pd.concat([df[[col]], df.loc[:,df.isna().sum() == 0]], 1)
        tr_y = tr[col]
        tr_X = tr.drop(columns=col)

        tr_T = tr_X.select_dtypes("number")
        cat_cols = pd.get_dummies(tr_X.select_dtypes(exclude="number"), prefix_sep="__")
        tr_T[cat_cols.columns.to_list()] = cat_cols

        tr_T[col] = tr_y
        tr = tr_T.copy("deep")

        train = tr[tr[col].notna()]
        test  = tr[tr[col].isna()]

        train_y = train[col]
        train_X = train.drop(columns=col)

        test_X = test.drop(columns=col)

        clf = DecisionTreeClassifier().fit(train_X, train_y)
        test_y = clf.predict(test_X)

        df.loc[df[col].isna(), col] = test_y
    new_line()
    print(f"Missing values imputed, Now there are {df.isna().sum().sum()} Missing values")
# ----------------------------------------------- END Imputing Missing values
# --------------------------------------------------------- Unique values
only_one_unique_value = df.nunique().where(lambda x:x == 1).dropna()
if only_one_unique_value.size:
    new_line()
    df.drop(columns=only_one_unique_value.index, inplace=True)
    last_ = ("", "it") if  only_one_unique_value.size == 1 else ("s", "those")
    to_print = f"There are {only_one_unique_value.size} variable{last_[0]} That have only one unique value, so we droped {last_[1]}.\nDropped column{last_[0]} name{last_[0]} (in order):"
    print(colored(to_print, 'red'))
    for i in only_one_unique_value.index.sort_values():
        print(i)
    new_line()
    print(f"\nNow {data_shape()}")
del only_one_unique_value
# #===
all_values_are_unique = df.apply(lambda x:x.is_unique).where(lambda x:x==True).dropna()
if all_values_are_unique.size:
    new_line()
    df.drop(columns=all_values_are_unique.index, inplace=True)
    last_ = ("", "it") if  all_values_are_unique.size == 1 else ("s", "those")
    to_print = f"There are {all_values_are_unique.size} column{last_[0]} that have all unique values, so no value repeatation, we droped {last_[1]} column{last_[0]}.\nDropped column{last_[0]} name{last_[0]} are:\n"
    print(colored(to_print, 'red'))
    for i in all_values_are_unique.index:
        print("\t", i)
    new_line()
    print(f"Now {data_shape()}")
del all_values_are_unique
#===
date_columns = []
def DTYPES():
    global date_columns
    catagorical_columns = df.head().select_dtypes("O").columns
    numerical_columns   = df.head().select_dtypes("number").columns
    date_columns        = []

    for i in catagorical_columns:
        try:
            df[i] = pd.to_datetime(df[i])
            date_columns.append(i)
        except:
            pass

    catagorical_columns = catagorical_columns.drop(date_columns)
    if date_columns:
        date_columns = pd.Index(date_columns)
    #===
    if not catagorical_columns.append(numerical_columns).append(date_columns).is_unique:
        new_line()
        print(colored("Some column/s repated in > 1 dtypes\n", 'red'))
        dtypes = pd.DataFrame({"Column" : catagorical_columns.append(numerical_columns).append(date_columns),
                    "dtype" : ['O']*len(catagorical_columns) + ['Number']*len(numerical_columns) + ['Date']*len(date_columns)})
        print(dtypes[dtypes.Column.isin(list(dtypes[dtypes.Column.duplicated()].Column.values))].to_string())
    #===
    x = df.columns.difference(
        catagorical_columns.append(numerical_columns).append(date_columns)
        )
    if x.size:
        new_line()
        print(colored("Some columns not included in any existing catagory, those:\n", 'red'))
        for i in x:
            print(f"\t<{i}, with dtype of <{df[i].dtype}>")
    #===
    dtypes = pd.DataFrame({"Column" : catagorical_columns.append(numerical_columns).append(date_columns),
                "dtype" : ['Object']*len(catagorical_columns) + ['Number']*len(numerical_columns) + ['Date']*len(date_columns)})
    return dtypes
#===
dtypes = DTYPES()
# ----------------------------------------------------------------------- Feature enginearing
# ======= Adding date columns
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> add polynomial, sqrt, tree, log features
def add_new_date_cols(x, suffix):
    d = {}
    d[suffix + '_week_normalized'] = x.dt.week / 52
    d[suffix + '_week_str'] = '"' + x.dt.week.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'

    d[suffix + '_year_after_min_year'] = x.dt.year - x.dt.year.min()
    d[suffix + '_year_str'] = '"' + x.dt.year.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'

    d[suffix + '_day_name']  = x.dt.day_name()

    d[suffix + '_day_after_min_date_str']  = '"' + (x - x.min()).apply(lambda x: str(x).split()[0]) + '"'

    d[suffix + '_day_normalized'] = x.dt.day / 31

    d[suffix + '_hour_normalized'] = x.dt.hour / 24
    d[suffix + '_hour_str'] = '"' + x.dt.hour.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'

    d[suffix + '_month_name'] = x.dt.month_name()
    d[suffix + '_month_normalized'] = x.dt.month/12
    for k,v in d.items():
        if v.nunique() > 1:
            df[k] = v
    return df.drop(columns=x.name)
    # return df

len_df_before_adding_date_vars = df.shape[1]
for date_col in date_columns:
    df = add_new_date_cols(df[date_col], date_col)
len_df_after_adding_date_vars  = df.shape[1]
if len_df_after_adding_date_vars > len_df_before_adding_date_vars:
    new_line()
    to_print = f"Added {len_df_after_adding_date_vars - len_df_before_adding_date_vars} date Features"
    print(colored(to_print, 'red'))

# ======= type casting of numerical variable (those who have < 4% unique values) to catagorical variables
f = (df.select_dtypes("number").nunique() / len(df) * 100).where(lambda x:x<4).dropna().index
if f.size:
    len_df_before_adding_date_vars = df.shape[1]
    for col_num_to_str in f:
        df[col_num_to_str+"_str"] = '"' + df[col_num_to_str].astype(str) + '"'
    len_df_after_adding_date_vars  = df.shape[1]
    new_line()
    to_print = f"Added {len_df_after_adding_date_vars - len_df_before_adding_date_vars} String Features (Extracted from numerical variables)"
    print(colored(to_print, 'red'))
# =======
def cluping_rare_cases_in_one_catagory(x):
    global df
    x = df[x]
    orignal  = x.copy("deep")
    xx = x.value_counts()
    xx = xx[xx< 10].index.to_list()
    x =  x.replace(xx , "Rare cases")
    if x.value_counts()[-1] < 8:
        x[x == "Rare cases"] = x.mode()[0] # agar "Rare cases" vali catogery me 8 sy bhi kam values hon to un ko most common value sy replace kar do
    if x.nunique() == 1:
        new_line()
        to_print = f"The column <{x.name}> have only one unique value, We droped it from the data."
        print(colored(to_print, 'red'))
        # return orignal
        df.drop(columns=x.name, inplace=True)
        return None
    return x

for var in df.select_dtypes("O").columns:
    m = cluping_rare_cases_in_one_catagory(var)
    if isinstance(m, pd.core.series.Series):
        df[var] = m
new_line()


xx = (df == 'Rare cases').sum().sort_values().where(lambda x:x>0).dropna()
xx = pd.DataFrame({"Count" : xx,
                "Ratio" : round(xx/len(df)*100, 4)})
print(f"<Rare case> catagory:\n{xx.to_string()}")
# ----------------------------------------------------------------------- END (Feature enginearing)
dtypes = DTYPES()
# ---------------------------------------------------- Correlation plot
new_line()
cor_df = df.select_dtypes('number').corr().abs()
mask = np.triu(np.ones_like(cor_df, dtype=bool));
f, ax = plt.subplots(figsize=(17, 10));
cmap = sns.color_palette("viridis", as_cmap=True);
plot_ = sns.heatmap(cor_df, mask=mask, cmap=cmap, vmax=.3, square=True, linewidths=.5, cbar_kws={"shrink": .5});
plot_.axes.set_title("abs (Correlation) plot",fontsize=25);
plt.show()
# ---------------------------------------------------------------------
#===
# m = 0
for row in dtypes.iterrows():
    # m += 1
    # if m == 3:
        # break
    column_name, type_ = row[1]
    x = df[column_name]
    to_print = f"\n\n\n========================================= {column_name} =========================================\n\n"
    print(colored(to_print, 'red'))

    for col_ in df.columns:
        if col_ == column_name:
            continue
        if df[col_].nunique() == df[column_name].nunique():
            unique_combination = df[[col_, column_name]].drop_duplicates()
            if unique_combination.apply(lambda x:x.is_unique).sum() == 2:
                new_line()
                to_print = f"This Columns is duplicate of <{col_}> column"
                print(colored(to_print, 'red'))

    # print(f"Column Type     : {type_}")
    print(f"Column Type     : ", end="")
    print(colored(type_, 'red'))
    if x.isna().all():
        new_line()
        df.drop(columns=column_name, inplace=True)
        print(colored("We dropped This column, because it is all Empty", 'red'))
        continue
    if type_ in ["O", "Date"]:
        if x.is_unique:
            new_line()
            df.drop(columns=column_name, inplace=True)
            to_print = f"We dropped This column, because it's a {type_} columns, and it's all values are unique"
            print(colored(to_print, 'red'))
            continue
    if x.nunique() == 1:
        new_line()
        df.drop(columns=column_name, inplace=True)
        print(colored("We dropped This column, because There is only one unique value", 'red'))
        continue

    if type_ == "Number":
        local_cor = cor_df[column_name].drop(column_name).reset_index()
        local_cor = local_cor.reindex(local_cor[column_name].abs().sort_values().index)
        if local_cor[column_name].max() == 1:
            new_line()
            to_print = f"This column is perfactly correlated with column <{local_cor[local_cor[column_name] == 1]['index'].values[0]}, so remove one of them"
            print(colored(to_print, 'red'))

        new_line()
        xm = local_cor[-3:].rename(columns={'index' : 'Column name', column_name : 'Correlation'}).reset_index(drop=True)
        xm.index = xm['Column name']
        xm.drop(columns="Column name", inplace=True);
        xm.plot(kind='barh', grid=True, figsize=(10,1.5));
        plt.title("Most 3 correlated features with this columns (sorted)", size=14);
        plt.xlabel("Correlation", size=12);
        plt.show();

        new_line()
        skewness = x.skew(skipna = True)
        if abs(skewness) < 0.5:
            print(f"The data is fairly symmetrical (skewness is: {skewness})")
        elif abs(skewness) < 1:
            print(f"The data are moderately skewed (skewness is: {skewness})")
        else:
            to_print = f"The data are highly skewed (skewness is: {skewness})\nNote: When skewness exceed |1| we called it highly skewed"
            print(colored(to_print, 'red'))

        # f = x.describe()
        # f['Nunique'] = x.nunique()
        # f['Nunique ratio'] = f.loc["Nunique"] / f.loc["count"] * 100
        # f['Outlies count'] = (((x - x.mean())/x.std()).abs() > 3).sum()
        # f['Outlies ratio'] = f.loc["Outlies count"] / f.loc["count"] * 100
        # f['Nagative values count'] = (x < 0).sum()
        # f['Nagative values ratio'] = f['Nagative values count'] / f['count'] * 100

        ff = [x.count(), x.isna().sum(), x.mean(), x.std(), x.min()]
        ff += x.quantile([.25,.5,.75]).to_list()
        ff += [x.max(), x.nunique(), (((x - x.mean())/x.std()).abs() > 3).sum(), (x < 0).sum(), (x == 0).sum()]

        f = pd.DataFrame(ff, index=['Count', 'NA', 'Mean', 'Std', 'Min', '25%', '50%', '75%', 'Max', 'Nunique', 'Outlies', 'Nagetive', 'Zeros'], columns=['Count'])
        f['Ratio'] = f.Count / x.count() * 100
        f.loc['Mean' : 'Max', 'Ratio'] = None

        new_line()
        print(f.round(2).to_string())
        plot_numerical_columns(column_name)

    elif type_ == "Object":
        # f = x.describe()
        # f = x.agg(['count', pd.Series.nunique])
        # f['len'] = len(x)
        # f['Na count'] = x.isna().sum()
        # f['Na ratio'] = f['Na count'] / f['count'] * 100
        # f['Most frequent'] = x.mode().values[0]
        # f['Most frequent count'] = (x == f['Most frequent']).sum()
        # f['Most frequent ratio'] = f['Most frequent count'] / f['count'] * 100
        # f['Least frequent'] = x.value_counts().tail(1).index[0]
        # f['Least frequent count'] = (x == f['Least frequent']).sum()
        # f['Least frequent ratio'] = f['Least frequent count'] / f['count'] * 100
        # f['Values occured only once count'] = x.value_counts().where(lambda x:x==1).dropna().size
        # f['Values occured only once Ratio'] = f['Values occured only once count'] / x.count() * 100

        l = x.count(), x.nunique(), len(x), x.isna().sum(), (x == x.mode().values[0]).sum(), (x == x.value_counts().tail(1).index[0]).sum(), x.value_counts().where(lambda x:x==1).dropna().size
        f = pd.DataFrame(l, index=['Count', 'Nunique', 'Len', 'NA', 'Most frequent', 'Least frequent', 'Values occured only once'], columns=['Counts'])
        f['Ratio'] = (f.Counts / x.count() * 100).round(4)
        f.loc[['Len'], 'Ratio'] = None

        new_line()
        print(f.to_string())


        if x.str.lower().nunique() != x.nunique():
            new_line()
            to_print = f"Case issue\n\tin orignal variable There are {x.nunique()} unique values\n\tin lower verstion there are   {x.str.lower().nunique()} unique values.\n"
            print(colored(to_print, 'red'))

        if x.str.strip().nunique() != x.nunique():
            new_line()
            to_print = f"Space issue\n\tin orignal variable There are {x.nunique()} unique values\n\tin striped verstion there are {x.str.strip().nunique()} unique values."
            print(colored(to_print, 'red'))

        plot_catagorical_columns(column_name)

    elif type == "Date":

        new_line()
        rd = relativedelta.relativedelta( pd.to_datetime(x.max()), pd.to_datetime(x.min()))
        to_print = f"Diffrenece between first and last date:\n\tYears : {rd.years}\n\tMonths: {rd.months}\n\tDays  : {rd.days}"
        print(colored(to_print, 'red'))

        # f = pd.Series({'Count' : x.count(),
        #             'Nunique count' : x.nunique(),
        #             'Nunique ratio' : x.nunique() / x.count() * 100,
        #             'Most frequent value' : str(x.mode()[0]),
        #             'Least frequent value' :  x.value_counts().tail(1).index[0]
        #             })
        # f['Most frequent count'] = (x == f['Most frequent value']).sum()
        # f['Most frequent ratio'] = f['Most frequent count'] / f['Count'] * 100
        # f['Least frequent count'] = (x == f['Least frequent value']).sum()
        # f['Least frequent ratio'] = f['Least frequent count'] / f['Count'] * 100
        # f['Values occured only once count'] = x.value_counts().where(lambda x:x==1).dropna().size
        # f['Values occured only once Ratio'] = f['Values occured only once count'] / x.count() * 100

        ff = x.count(), x.nunique(), (x == x.mode().values[0]).sum(), (x == x.value_counts().tail(1).index[0]).sum(), x.value_counts().where(lambda x:x==1).dropna().size
        f = pd.DataFrame(ff, index=["Count", 'Nunique', 'Most frequent values', 'Least frequent values', 'Values occured only once count'], columns=['Counts'])
        f['Ratio'] = (f.Counts / x.count() * 100).round(4)

        new_line()
        print(f"\n{f.to_string()}")


        f = set(np.arange(x.dt.year.min(),x.dt.year.max()+1)).difference(
            x.dt.year.unique())
        if f:
            new_line()
            print(colored("These Years (in order) are missing:\n", 'red'))
            for i in f:
                print("\t", i, end=", ")

        f = set(np.arange(x.dt.month.min(),x.dt.month.max()+1)).difference(
            x.dt.month.unique())
        if f:
            new_line()
            print(colored("These Months (in order) are missing:\n", 'red'))
            for i in f:
                print("\t", i, end=", ")

        f = set(np.arange(x.dt.day.min(),x.dt.day.max()+1)).difference(
            x.dt.day.unique())
        if f:
            new_line()
            print(colored("These Days (in order) are missing:\n", 'red'))
            for i in f:
                print("\t", i, end=", ")

        new_line()
        plot_date_columns(column_name)


# ================================================================================================================ Modeling
print("\n\n")
print("----------------------------------------------------------------------------------------------")
print("****************************************** Modeling ******************************************")

# Regression problem
if df[target_variable].dtype in [float, int]:

    print("\n-------------------- This is Regression problem --------------------\n")
    print("''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")

    df_T = df.select_dtypes("number")
    cat_cols = pd.get_dummies(df.select_dtypes(exclude="number"), prefix_sep="__")
    df_T[cat_cols.columns.to_list()] = cat_cols

    df = df_T.copy("deep")
    del df_T
    del cat_cols
    # ====
    train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])
    # ====
    # --------------------------------------------------------- Linear regression
    to_print = "\n ------------------------------------- Linear Regression -------------------------------------\n"
    print(colored(to_print, 'red'))

    model_reg = OLS(train_y, train_X).fit()
    summary = model_reg.summary()
    summary_df = pd.DataFrame(summary.tables[1])
    summary_df.columns = summary_df.iloc[0]
    summary_df.drop(0, inplace=True)
    summary_df.columns = summary_df.columns.astype(str)
    summary_df.columns = ["Variable"] + summary_df.columns[1:].to_list()
    for i in summary_df.columns[1:]:
        summary_df[i] = summary_df[i].astype(str).astype(float)
    summary_df.Variable = summary_df.Variable.astype(str)
    summary_df['Indicator'] = summary_df['P>|t|'].apply(lambda x:"***" if x < 0.001 else "**" if x < 0.01 else "*" if x < 0.05 else "." if x < 0.1  else "")
    summary_df = summary_df.sort_values("Variable").reset_index(drop=True)
    summary_df.to_csv()
    new_line()
    print(colored("NOTE: This summary saved as <summary_OLS_1.csv>", 'red'))

    new_line()
    print(summary_df.to_string())
    # ============================= Model statistic
    predictions = model_reg.predict(test_X)

    new_line()
    print(colored(" --- Model statistic --- \n", 'red'))
    print(f"R-squared         : {round(model_reg.rsquared, 3)}")
    print(f"Adj. R-squared    : {round(model_reg.rsquared_adj, 3)}")
    print(f"F-statistic       : {round(model_reg.fvalue)}")
    print(f"Prob (F-statistic): {model_reg.f_pvalue}")
    print(f"No. Observations  : {round(model_reg.nobs)}")
    print(f"AIC               : {round(model_reg.aic)}")
    print(f"Df Residuals      : {round(model_reg.df_resid)}")
    print(f"BIC               : {round(model_reg.bic)}")
    print(f"RMSE (test)       : {RMSE(predictions)}")
    # ======
    f = train_X.copy("deep")
    f['Errors__'] = model_reg.resid
    f = f.corr()['Errors__'].drop("Errors__").abs().sort_values().dropna().tail(1)
    new_line()
    print(f"Maximum correlation between Reseduals and any data columns is {f.values[0]}, with columns <{f.index[0]}>")
    print(f"Mean of train reseduals: {model_reg.resid.mean()}")
    del f
    # ============================= END (Model statistic)
    # --------------------------------------------------------- END Linear regression




    # --------------------------------------------------------- Random Forest
    print("\n ------------------------------------- Random Forest -------------------------------------\n")

    rf = RandomForestRegressor(n_estimators = 200, oob_score=True)
    model_rf = rf.fit(train_X, train_y);
    predictions_rf = rf.predict(test_X)

    new_line()
    print(colored("RF model peramters:\n", 'red'))
    pprint.pprint(model_rf.get_params())

    new_line()
    importances = list(rf.feature_importances_)
    feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(test_X, importances)]
    featuresImportance = pd.Series(model_rf.feature_importances_, index=train_X.columns).sort_values(ascending=False)
    if len(featuresImportance) > 30:
        featuresImportance = featuresImportance.head(30)
    featuresImportance.plot(figsize=(20,10), kind='bar', grid=True);
    plt.title("RandomForest Feature importances Graph", size=18,color='red');
    plt.xlabel("Features", size=14, color='red');
    plt.ylabel("Importance", size=14, color='red');
    plt.show();
    del featuresImportance

    new_line()
    print(colored("--- Model statistic ---", 'red'))
    # The coefficient of determination R^2 of the prediction.
    # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html
    print(f"R^2 (test) : {rf.score(test_X, test_y)}")
    print(f"R^2 (train): {rf.score(train_X, train_y)}")
    print(f"RMSE (test): {RMSE(predictions_rf)}")
    print(f"oob score  : {model_rf.oob_score_}")

    f = test_X.copy("deep")
    errors_rf = predictions_rf - test_y
    f['Errors__'] = errors_rf
    f = f.corr()['Errors__'].drop("Errors__").abs().sort_values().dropna().tail(1)
    new_line()
    print(f"Maximum correlation between Reseduals and any data columns is {f.values[0]}, with columns <{f.index[0]}>")
    # --------------------------------------------------------- END Random Forest
elif df[target_variable].dtype == "O":
    # Classififcation problem
    if df[target_variable].nunique() == 2:
        print("\n-------------------- This is Binary Classification problem --------------------\n")
        print("''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")
        df = pd.concat([
                        df.select_dtypes(exclude = "O"),
                        pd.get_dummies(df.drop(columns=target_variable).select_dtypes("O")),
                        df[[target_variable]]
                        ], 1)

        train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])
        clf = LogisticRegression().fit(train_X, train_y)
        predictions = clf.predict_proba(test_X)
        predictions = pd.Series(predictions[:, 0])
        lst = []
        for thresh in np.linspace(predictions.min(), predictions.max(), 50)[1:]:
            pred = predictions < thresh

            pred.loc[pred == True] = clf.classes_[0]
            pred.loc[pred == False] = clf.classes_[1]

            test_y = test_y.reset_index(drop=True)

            TN = ((pred == clf.classes_[0]) & (test_y == clf.classes_[0])).sum()
            TP = ((pred == clf.classes_[1]) & (test_y == clf.classes_[1])).sum()
            FN = ((pred == clf.classes_[0]) & (test_y == clf.classes_[1])).sum()
            FP = ((pred == clf.classes_[1]) & (test_y == clf.classes_[0])).sum()

            p = TP / (TP + FP)
            r = TP / (TP + FN)
            f = 2 * ((p * r) / (p+r))

            lst.append((thresh, (pred == test_y).mean(), p, r , f))

        d = pd.DataFrame(lst, columns=["Thresold", "Accurecy(0-1)", "Precision", "Recall", "F1"])
        d = d.set_index("Thresold")
        d.plot(grid=True, figsize=(18,7));
        plt.title("Model performance at diffrent Thresolds", size=18, color='red');
        plt.xlabel("Thresold", size=14, color='red');
        plt.ylabel("");
        plt.show()
    else:
        to_print = "\n-------------------- This is Multiclass Classification problem --------------------\n"
        print(colored(to_print, 'red'))
        print("'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")

        df.loc[:, df.select_dtypes("O").columns] = df.select_dtypes("O").apply(lambda x: pd.Series(LabelEncoder().fit_transform(x.astype(str))).astype(str))
        train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])

        clf=RandomForestClassifier(n_estimators=1000).fit(train_X, train_y)
        predictions = clf.predict(test_X)
        feature_imp = pd.Series(clf.feature_importances_,index=train_X.columns).sort_values(ascending=False)
        if feature_imp.size > 30:
            feature_imp = feature_imp.head(30)
        feature_imp.plot(kind='barh', figsize=(17,10), grid=True);
        plt.title("Feature importances Graph", size=18, color='red');
        plt.xlabel("Importance", size=14, color='red');
        plt.ylabel("Feature", size=14, color='red');
        plt.show()
        # ====
        f = (test_y, predictions)
        f_int = (test_y.astype(int), predictions.astype(int))

        print(f"accuracy_score: {metrics.accuracy_score(*f)}")
        print(f"f1_score: {metrics.f1_score(*f_int)}")

        metrics.plot_roc_curve(clf, test_X, test_y);
        plt.title("ROC curve plot");
        plt.show();

        metrics.ConfusionMatrixDisplay(metrics.confusion_matrix(*f)); plt.show()

        metrics.plot_confusion_matrix(clf, test_X, test_y);
        plt.title("Confusion matrix");
        plt.show()

        metrics.plot_precision_recall_curve(clf, test_X, test_y);
        plt.title("Precision recall curve");
        plt.show()
# ================================================================================================================ END Modeling
-------------------------

The Data have:
	2919 rows
	81 columns


-------------------------

Columns types distribution:

object     43
int64      26
float64    12
dtype: int64

-------------------------

There are 1459 NAs in target values, we droped those rows

-------------------------

There are 19 (out of 81, [23%]) columns that contains 1 or more NA.

-------------------------

19 NA_indicator variables added to the data

========= NA Graphs =========

-------------------------

-------------------------

NA columns data type Distribution:

object     16
float64     3
dtype: int64

-------------------------

NaN Ratio (0-100)

Electrical       0.068493
MasVnrType       0.547945
MasVnrArea       0.547945
BsmtQual         2.534247
BsmtCond         2.534247
BsmtFinType1     2.534247
BsmtExposure     2.602740
BsmtFinType2     2.602740
GarageCond       5.547945
GarageQual       5.547945
GarageFinish     5.547945
GarageType       5.547945
GarageYrBlt      5.547945
LotFrontage     17.739726
FireplaceQu     47.260274
Fence           80.753425
Alley           93.767123
MiscFeature     96.301370
PoolQC          99.520548
dtype: float64

-------------------------

(Before Missing values treatment)
There are 6965 Missing values:
	6617 in catagorical variables
	348 in numerical columns
	0.0 in others

(After filling numeric missing values)
There are 6617 Missing values:
	6617 in catagorical variables
	0 in numerical columns
	0.0 in others

-------------------------

Missing values imputed, Now there are 0 Missing values

-------------------------

There are 1 column that have all unique values, so no value repeatation, we droped it column.
Dropped column name are:

	 Id

-------------------------

Now The Data have:
	1460 rows
	99 columns


-------------------------

Added 18 String Features (Extracted from numerical variables)

-------------------------

The column <Street> have only one unique value, We droped it from the data.

-------------------------

The column <Utilities> have only one unique value, We droped it from the data.

-------------------------

The column <Electrical_NA_indicator> have only one unique value, We droped it from the data.

-------------------------

The column <PoolQC_NA_indicator> have only one unique value, We droped it from the data.

-------------------------

The column <PoolArea_str> have only one unique value, We droped it from the data.

-------------------------

<Rare case> catagory:
                         Count   Ratio
HouseStyle                 8.0  0.5479
MasVnrType_NA_indicator    8.0  0.5479
MasVnrArea_NA_indicator    8.0  0.5479
FullBath_str               9.0  0.6164
Foundation                 9.0  0.6164
RoofStyle                  9.0  0.6164
MiscFeature               10.0  0.6849
Neighborhood              11.0  0.7534
Heating                   14.0  0.9589
BedroomAbvGr_str          14.0  0.9589
Condition1                15.0  1.0274
Condition2                15.0  1.0274
RoofMatl                  15.0  1.0274
Exterior2nd               17.0  1.1644
3SsnPorch_str             24.0  1.6438
LowQualFinSF_str          26.0  1.7808
SaleType                  28.0  1.9178
MiscVal_str               41.0  2.8082

-------------------------




========================================= MSZoning =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1151   78.8356
Least frequent                10    0.6849
Values occured only once       0    0.0000



========================================= Alley =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent                835   57.1918
Least frequent               625   42.8082
Values occured only once       0    0.0000



========================================= LotShape =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                925   63.3562
Least frequent                10    0.6849
Values occured only once       0    0.0000



========================================= LandContour =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1311   89.7945
Least frequent                36    2.4658
Values occured only once       0    0.0000



========================================= LotConfig =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1056   72.3288
Least frequent                47    3.2192
Values occured only once       0    0.0000



========================================= LandSlope =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1382   94.6575
Least frequent                13    0.8904
Values occured only once       0    0.0000



========================================= Neighborhood =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       24    1.6438
Len                         1460       NaN
NA                             0    0.0000
Most frequent                225   15.4110
Least frequent                11    0.7534
Values occured only once       0    0.0000



========================================= Condition1 =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        7    0.4795
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1260   86.3014
Least frequent                11    0.7534
Values occured only once       0    0.0000



========================================= Condition2 =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1445   98.9726
Least frequent                15    1.0274
Values occured only once       0    0.0000



========================================= BldgType =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1220   83.5616
Least frequent                31    2.1233
Values occured only once       0    0.0000



========================================= HouseStyle =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        8    0.5479
Len                         1460       NaN
NA                             0    0.0000
Most frequent                726   49.7260
Least frequent                 8    0.5479
Values occured only once       0    0.0000



========================================= RoofStyle =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1141   78.1507
Least frequent                 9    0.6164
Values occured only once       0    0.0000



========================================= RoofMatl =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1434   98.2192
Least frequent                11    0.7534
Values occured only once       0    0.0000



========================================= Exterior1st =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       10    0.6849
Len                         1460       NaN
NA                             0    0.0000
Most frequent                522   35.7534
Least frequent                20    1.3699
Values occured only once       0    0.0000



========================================= Exterior2nd =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       12    0.8219
Len                         1460       NaN
NA                             0    0.0000
Most frequent                504   34.5205
Least frequent                10    0.6849
Values occured only once       0    0.0000



========================================= MasVnrType =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                864   59.1781
Least frequent                16    1.0959
Values occured only once       0    0.0000



========================================= ExterQual =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                906   62.0548
Least frequent                14    0.9589
Values occured only once       0    0.0000



========================================= ExterCond =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1286   88.0822
Least frequent                28    1.9178
Values occured only once       0    0.0000



========================================= Foundation =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                647   44.3151
Least frequent                 9    0.6164
Values occured only once       0    0.0000



========================================= BsmtQual =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                663   45.4110
Least frequent                52    3.5616
Values occured only once       0    0.0000



========================================= BsmtCond =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1340   91.7808
Least frequent                54    3.6986
Values occured only once       0    0.0000



========================================= BsmtExposure =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                956   65.4795
Least frequent               114    7.8082
Values occured only once       0    0.0000



========================================= BsmtFinType1 =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        6    0.4110
Len                         1460       NaN
NA                             0    0.0000
Most frequent                467   31.9863
Least frequent                74    5.0685
Values occured only once       0    0.0000



========================================= BsmtFinType2 =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        6    0.4110
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1293   88.5616
Least frequent                14    0.9589
Values occured only once       0    0.0000



========================================= Heating =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1428   97.8082
Least frequent                14    0.9589
Values occured only once       0    0.0000



========================================= HeatingQC =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                742   50.8219
Least frequent                49    3.3562
Values occured only once       0    0.0000



========================================= CentralAir =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1365   93.4932
Least frequent                95    6.5068
Values occured only once       0    0.0000



========================================= Electrical =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1339   91.7123
Least frequent                27    1.8493
Values occured only once       0    0.0000



========================================= KitchenQual =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                735   50.3425
Least frequent                39    2.6712
Values occured only once       0    0.0000



========================================= Functional =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1366   93.5616
Least frequent                14    0.9589
Values occured only once       0    0.0000



========================================= FireplaceQu =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                680   46.5753
Least frequent                33    2.2603
Values occured only once       0    0.0000



========================================= GarageType =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                910   62.3288
Least frequent                11    0.7534
Values occured only once       0    0.0000



========================================= GarageFinish =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                672   46.0274
Least frequent               354   24.2466
Values occured only once       0    0.0000



========================================= GarageQual =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1331   91.1644
Least frequent                15    1.0274
Values occured only once       0    0.0000



========================================= GarageCond =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1361   93.2192
Least frequent                15    1.0274
Values occured only once       0    0.0000



========================================= PavedDrive =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1340   91.7808
Least frequent                30    2.0548
Values occured only once       0    0.0000



========================================= PoolQC =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                865   59.2466
Least frequent                26    1.7808
Values occured only once       0    0.0000



========================================= Fence =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                736   50.4110
Least frequent                39    2.6712
Values occured only once       0    0.0000



========================================= MiscFeature =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1371   93.9041
Least frequent                10    0.6849
Values occured only once       0    0.0000



========================================= SaleType =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1267   86.7808
Least frequent                28    1.9178
Values occured only once       0    0.0000



========================================= SaleCondition =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1202   82.3288
Least frequent                12    0.8219
Values occured only once       0    0.0000



========================================= LotFrontage_NA_indicator =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1201   82.2603
Least frequent               259   17.7397
Values occured only once       0    0.0000



========================================= Alley_NA_indicator =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1369   93.7671
Least frequent                91    6.2329
Values occured only once       0    0.0000



========================================= MasVnrType_NA_indicator =========================================



-------------------------

This Columns is duplicate of <MasVnrArea_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1452   99.4521
Least frequent                 8    0.5479
Values occured only once       0    0.0000



========================================= MasVnrArea_NA_indicator =========================================



-------------------------

This Columns is duplicate of <MasVnrType_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1452   99.4521
Least frequent                 8    0.5479
Values occured only once       0    0.0000



========================================= BsmtQual_NA_indicator =========================================



-------------------------

This Columns is duplicate of <BsmtCond_NA_indicator> column

-------------------------

This Columns is duplicate of <BsmtFinType1_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1423   97.4658
Least frequent                37    2.5342
Values occured only once       0    0.0000



========================================= BsmtCond_NA_indicator =========================================



-------------------------

This Columns is duplicate of <BsmtQual_NA_indicator> column

-------------------------

This Columns is duplicate of <BsmtFinType1_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1423   97.4658
Least frequent                37    2.5342
Values occured only once       0    0.0000



========================================= BsmtExposure_NA_indicator =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1422   97.3973
Least frequent                38    2.6027
Values occured only once       0    0.0000



========================================= BsmtFinType1_NA_indicator =========================================



-------------------------

This Columns is duplicate of <BsmtQual_NA_indicator> column

-------------------------

This Columns is duplicate of <BsmtCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1423   97.4658
Least frequent                37    2.5342
Values occured only once       0    0.0000



========================================= BsmtFinType2_NA_indicator =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1422   97.3973
Least frequent                38    2.6027
Values occured only once       0    0.0000



========================================= FireplaceQu_NA_indicator =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent                770   52.7397
Least frequent               690   47.2603
Values occured only once       0    0.0000



========================================= GarageType_NA_indicator =========================================



-------------------------

This Columns is duplicate of <GarageYrBlt_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageFinish_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageQual_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000



========================================= GarageYrBlt_NA_indicator =========================================



-------------------------

This Columns is duplicate of <GarageType_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageFinish_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageQual_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000



========================================= GarageFinish_NA_indicator =========================================



-------------------------

This Columns is duplicate of <GarageType_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageYrBlt_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageQual_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000



========================================= GarageQual_NA_indicator =========================================



-------------------------

This Columns is duplicate of <GarageType_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageYrBlt_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageFinish_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000



========================================= GarageCond_NA_indicator =========================================



-------------------------

This Columns is duplicate of <GarageType_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageYrBlt_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageFinish_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageQual_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000



========================================= Fence_NA_indicator =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1179   80.7534
Least frequent               281   19.2466
Values occured only once       0    0.0000



========================================= MiscFeature_NA_indicator =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1406   96.3014
Least frequent                54    3.6986
Values occured only once       0    0.0000



========================================= MSSubClass_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       14    0.9589
Len                         1460       NaN
NA                             0    0.0000
Most frequent                540   36.9863
Least frequent                10    0.6849
Values occured only once       0    0.0000



========================================= OverallQual_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        8    0.5479
Len                         1460       NaN
NA                             0    0.0000
Most frequent                402   27.5342
Least frequent                18    1.2329
Values occured only once       0    0.0000



========================================= OverallCond_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        7    0.4795
Len                         1460       NaN
NA                             0    0.0000
Most frequent                827   56.6438
Least frequent                22    1.5068
Values occured only once       0    0.0000



========================================= LowQualFinSF_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1434   98.2192
Least frequent                26    1.7808
Values occured only once       0    0.0000



========================================= BsmtFullBath_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                857   58.6986
Least frequent                15    1.0274
Values occured only once       0    0.0000



========================================= BsmtHalfBath_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1380   94.5205
Least frequent                80    5.4795
Values occured only once       0    0.0000



========================================= FullBath_str =========================================



-------------------------

This Columns is duplicate of <FullBath> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                768   52.6027
Least frequent                 9    0.6164
Values occured only once       0    0.0000



========================================= HalfBath_str =========================================



-------------------------

This Columns is duplicate of <HalfBath> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                913   62.5342
Least frequent                12    0.8219
Values occured only once       0    0.0000



========================================= BedroomAbvGr_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        6    0.4110
Len                         1460       NaN
NA                             0    0.0000
Most frequent                804   55.0685
Least frequent                14    0.9589
Values occured only once       0    0.0000



========================================= KitchenAbvGr_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1395   95.5479
Least frequent                65    4.4521
Values occured only once       0    0.0000



========================================= TotRmsAbvGrd_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       10    0.6849
Len                         1460       NaN
NA                             0    0.0000
Most frequent                404   27.6712
Least frequent                11    0.7534
Values occured only once       0    0.0000



========================================= Fireplaces_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                695   47.6027
Least frequent               115    7.8767
Values occured only once       0    0.0000



========================================= GarageCars_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                829   56.7808
Least frequent                81    5.5479
Values occured only once       0    0.0000



========================================= 3SsnPorch_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1436   98.3562
Least frequent                24    1.6438
Values occured only once       0    0.0000



========================================= MiscVal_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1408   96.4384
Least frequent                11    0.7534
Values occured only once       0    0.0000



========================================= MoSold_str =========================================



-------------------------

This Columns is duplicate of <MoSold> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       12    0.8219
Len                         1460       NaN
NA                             0    0.0000
Most frequent                253   17.3288
Least frequent                52    3.5616
Values occured only once       0    0.0000



========================================= YrSold_str =========================================



-------------------------

This Columns is duplicate of <YrSold> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                338   23.1507
Least frequent               175   11.9863
Values occured only once       0    0.0000



========================================= MSSubClass =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.4076567471495591)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

           Count   Ratio
Count     1460.0  100.00
NA           0.0    0.00
Mean        56.9     NaN
Std         42.3     NaN
Min         20.0     NaN
25%         20.0     NaN
50%         50.0     NaN
75%         70.0     NaN
Max        190.0     NaN
Nunique     15.0    1.03
Outlies     30.0    2.05
Nagetive     0.0    0.00
Zeros        0.0    0.00



========================================= LotFrontage =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 2.0120008521763144)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        70.75     NaN
Std         23.47     NaN
Min         21.00     NaN
25%         60.00     NaN
50%         70.00     NaN
75%         80.00     NaN
Max        313.00     NaN
Nunique    224.00   15.34
Outlies     14.00    0.96
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= LotArea =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 12.207687851233496)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

              Count   Ratio
Count       1460.00  100.00
NA             0.00    0.00
Mean       10516.83     NaN
Std         9981.26     NaN
Min         1300.00     NaN
25%         7553.50     NaN
50%         9478.50     NaN
75%        11601.50     NaN
Max       215245.00     NaN
Nunique     1073.00   73.49
Outlies       13.00    0.89
Nagetive       0.00    0.00
Zeros          0.00    0.00



========================================= OverallQual =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.2169439277628693)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         6.10     NaN
Std          1.38     NaN
Min          1.00     NaN
25%          5.00     NaN
50%          6.00     NaN
75%          7.00     NaN
Max         10.00     NaN
Nunique     10.00    0.68
Outlies      2.00    0.14
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= OverallCond =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.6930674724842182)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         5.58     NaN
Std          1.11     NaN
Min          1.00     NaN
25%          5.00     NaN
50%          5.00     NaN
75%          6.00     NaN
Max          9.00     NaN
Nunique      9.00    0.62
Outlies     28.00    1.92
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= YearBuilt =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: -0.613461172488183)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1971.27     NaN
Std         30.20     NaN
Min       1872.00     NaN
25%       1954.00     NaN
50%       1973.00     NaN
75%       2000.00     NaN
Max       2010.00     NaN
Nunique    112.00    7.67
Outlies      6.00    0.41
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= YearRemodAdd =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: -0.5035620027004709)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1984.87     NaN
Std         20.65     NaN
Min       1950.00     NaN
25%       1967.00     NaN
50%       1994.00     NaN
75%       2004.00     NaN
Max       2010.00     NaN
Nunique     61.00    4.18
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= MasVnrArea =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 2.6682455485578593)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       103.84     NaN
Std        180.74     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%        166.00     NaN
Max       1600.00     NaN
Nunique    335.00   22.95
Outlies     32.00    2.19
Nagetive     0.00    0.00
Zeros      861.00   58.97



========================================= BsmtFinSF1 =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.685503071910789)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       443.64     NaN
Std        456.10     NaN
Min          0.00     NaN
25%          0.00     NaN
50%        383.50     NaN
75%        712.25     NaN
Max       5644.00     NaN
Nunique    637.00   43.63
Outlies      6.00    0.41
Nagetive     0.00    0.00
Zeros      467.00   31.99



========================================= BsmtFinSF2 =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.255261108933303)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        46.55     NaN
Std        161.32     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max       1474.00     NaN
Nunique    144.00    9.86
Outlies     50.00    3.42
Nagetive     0.00    0.00
Zeros     1293.00   88.56



========================================= BsmtUnfSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.9202684528039037)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       567.24     NaN
Std        441.87     NaN
Min          0.00     NaN
25%        223.00     NaN
50%        477.50     NaN
75%        808.00     NaN
Max       2336.00     NaN
Nunique    780.00   53.42
Outlies     11.00    0.75
Nagetive     0.00    0.00
Zeros      118.00    8.08



========================================= TotalBsmtSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.5242545490627664)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1057.43     NaN
Std        438.71     NaN
Min          0.00     NaN
25%        795.75     NaN
50%        991.50     NaN
75%       1298.25     NaN
Max       6110.00     NaN
Nunique    721.00   49.38
Outlies     10.00    0.68
Nagetive     0.00    0.00
Zeros       37.00    2.53



========================================= 1stFlrSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.3767566220336365)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1162.63     NaN
Std        386.59     NaN
Min        334.00     NaN
25%        882.00     NaN
50%       1087.00     NaN
75%       1391.25     NaN
Max       4692.00     NaN
Nunique    753.00   51.58
Outlies     12.00    0.82
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= 2ndFlrSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.8130298163023265)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       346.99     NaN
Std        436.53     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%        728.00     NaN
Max       2065.00     NaN
Nunique    417.00   28.56
Outlies      4.00    0.27
Nagetive     0.00    0.00
Zeros      829.00   56.78



========================================= LowQualFinSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 9.011341288465387)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         5.84     NaN
Std         48.62     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        572.00     NaN
Nunique     24.00    1.64
Outlies     20.00    1.37
Nagetive     0.00    0.00
Zeros     1434.00   98.22



========================================= GrLivArea =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.3665603560164552)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1515.46     NaN
Std        525.48     NaN
Min        334.00     NaN
25%       1129.50     NaN
50%       1464.00     NaN
75%       1776.75     NaN
Max       5642.00     NaN
Nunique    861.00   58.97
Outlies     16.00    1.10
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= BsmtFullBath =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.596066609663168)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.43     NaN
Std          0.52     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          1.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies     16.00    1.10
Nagetive     0.00    0.00
Zeros      856.00   58.63



========================================= BsmtHalfBath =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.103402697955168)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.06     NaN
Std          0.24     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max          2.00     NaN
Nunique      3.00    0.21
Outlies     82.00    5.62
Nagetive     0.00    0.00
Zeros     1378.00   94.38



========================================= FullBath =========================================



-------------------------

This Columns is duplicate of <FullBath_str> column
Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.036561558402727165)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         1.57     NaN
Std          0.55     NaN
Min          0.00     NaN
25%          1.00     NaN
50%          2.00     NaN
75%          2.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        9.00    0.62



========================================= HalfBath =========================================



-------------------------

This Columns is duplicate of <HalfBath_str> column
Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.675897448233722)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.38     NaN
Std          0.50     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          1.00     NaN
Max          2.00     NaN
Nunique      3.00    0.21
Outlies     12.00    0.82
Nagetive     0.00    0.00
Zeros      913.00   62.53



========================================= BedroomAbvGr =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.21179009627507137)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         2.87     NaN
Std          0.82     NaN
Min          0.00     NaN
25%          2.00     NaN
50%          3.00     NaN
75%          3.00     NaN
Max          8.00     NaN
Nunique      8.00    0.55
Outlies     14.00    0.96
Nagetive     0.00    0.00
Zeros        6.00    0.41



========================================= KitchenAbvGr =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.488396777072859)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         1.05     NaN
Std          0.22     NaN
Min          0.00     NaN
25%          1.00     NaN
50%          1.00     NaN
75%          1.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies     68.00    4.66
Nagetive     0.00    0.00
Zeros        1.00    0.07



========================================= TotRmsAbvGrd =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.6763408364355531)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         6.52     NaN
Std          1.63     NaN
Min          2.00     NaN
25%          5.00     NaN
50%          6.00     NaN
75%          7.00     NaN
Max         14.00     NaN
Nunique     12.00    0.82
Outlies     12.00    0.82
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= Fireplaces =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.6495651830548841)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.61     NaN
Std          0.64     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          1.00     NaN
75%          1.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies      5.00    0.34
Nagetive     0.00    0.00
Zeros      690.00   47.26



========================================= GarageYrBlt =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: -0.541264504372725)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1977.23     NaN
Std         24.78     NaN
Min       1900.00     NaN
25%       1960.00     NaN
50%       1978.00     NaN
75%       2001.00     NaN
Max       2010.00     NaN
Nunique    148.00   10.14
Outlies      1.00    0.07
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= GarageCars =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: -0.3425489297486655)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         1.77     NaN
Std          0.75     NaN
Min          0.00     NaN
25%          1.00     NaN
50%          2.00     NaN
75%          2.00     NaN
Max          4.00     NaN
Nunique      5.00    0.34
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros       81.00    5.55



========================================= GarageArea =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.17998090674623907)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       472.98     NaN
Std        213.80     NaN
Min          0.00     NaN
25%        334.50     NaN
50%        480.00     NaN
75%        576.00     NaN
Max       1418.00     NaN
Nunique    441.00   30.21
Outlies      7.00    0.48
Nagetive     0.00    0.00
Zeros       81.00    5.55



========================================= WoodDeckSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.5413757571931312)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        94.24     NaN
Std        125.34     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%        168.00     NaN
Max        857.00     NaN
Nunique    274.00   18.77
Outlies     22.00    1.51
Nagetive     0.00    0.00
Zeros      761.00   52.12



========================================= OpenPorchSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 2.3643417403694404)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        46.66     NaN
Std         66.26     NaN
Min          0.00     NaN
25%          0.00     NaN
50%         25.00     NaN
75%         68.00     NaN
Max        547.00     NaN
Nunique    202.00   13.84
Outlies     27.00    1.85
Nagetive     0.00    0.00
Zeros      656.00   44.93



========================================= EnclosedPorch =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 3.08987190371177)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        21.95     NaN
Std         61.12     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        552.00     NaN
Nunique    120.00    8.22
Outlies     51.00    3.49
Nagetive     0.00    0.00
Zeros     1252.00   85.75



========================================= 3SsnPorch =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 10.304342032693112)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         3.41     NaN
Std         29.32     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        508.00     NaN
Nunique     20.00    1.37
Outlies     23.00    1.58
Nagetive     0.00    0.00
Zeros     1436.00   98.36



========================================= ScreenPorch =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.122213743143115)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        15.06     NaN
Std         55.76     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        480.00     NaN
Nunique     76.00    5.21
Outlies     55.00    3.77
Nagetive     0.00    0.00
Zeros     1344.00   92.05



========================================= PoolArea =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 14.828373640750588)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         2.76     NaN
Std         40.18     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        738.00     NaN
Nunique      8.00    0.55
Outlies      7.00    0.48
Nagetive     0.00    0.00
Zeros     1453.00   99.52



========================================= MiscVal =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 24.476794188821916)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

             Count   Ratio
Count      1460.00  100.00
NA            0.00    0.00
Mean         43.49     NaN
Std         496.12     NaN
Min           0.00     NaN
25%           0.00     NaN
50%           0.00     NaN
75%           0.00     NaN
Max       15500.00     NaN
Nunique      21.00    1.44
Outlies       8.00    0.55
Nagetive      0.00    0.00
Zeros      1408.00   96.44



========================================= MoSold =========================================



-------------------------

This Columns is duplicate of <MoSold_str> column
Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.21205298505146022)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         6.32     NaN
Std          2.70     NaN
Min          1.00     NaN
25%          5.00     NaN
50%          6.00     NaN
75%          8.00     NaN
Max         12.00     NaN
Nunique     12.00    0.82
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= YrSold =========================================



-------------------------

This Columns is duplicate of <YrSold_str> column
Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.09626851386568028)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      2007.82     NaN
Std          1.33     NaN
Min       2006.00     NaN
25%       2007.00     NaN
50%       2008.00     NaN
75%       2009.00     NaN
Max       2010.00     NaN
Nunique      5.00    0.34
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= SalePrice =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.8828757597682129)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

             Count   Ratio
Count       1460.0  100.00
NA             0.0    0.00
Mean      180921.2     NaN
Std        79442.5     NaN
Min        34900.0     NaN
25%       129975.0     NaN
50%       163000.0     NaN
75%       214000.0     NaN
Max       755000.0     NaN
Nunique      663.0   45.41
Outlies       22.0    1.51
Nagetive       0.0    0.00
Zeros          0.0    0.00


----------------------------------------------------------------------------------------------
****************************************** Modeling ******************************************

-------------------- This is Regression problem --------------------

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

 ------------------------------------- Linear Regression -------------------------------------


-------------------------

NOTE: This summary saved as <summary_OLS_1.csv>

-------------------------

                                   Variable        coef    std err      t  P>|t|      [0.025      0.975] Indicator
0                                  1stFlrSF      3.3533     12.672  0.265  0.791     -21.521      28.227          
1                                  2ndFlrSF     12.8116     11.831  1.083  0.279     -10.411      36.034          
2                                 3SsnPorch     62.6969     57.210  1.096  0.273     -49.597     174.990          
3                      3SsnPorch_str__"0.0"   1234.7586   6832.778  0.181  0.857  -12200.000   14600.000          
4                 3SsnPorch_str__Rare cases  -1239.5703   6832.838 -0.181  0.856  -14700.000   12200.000          
5               Alley_NA_indicator__Missing  -1085.4065   2249.147 -0.483  0.630   -5500.115    3329.302          
6           Alley_NA_indicator__Not missing   1080.5948   2249.139  0.480  0.631   -3334.098    5495.288          
7                               Alley__Grvl  -3292.4841   1581.348 -2.082  0.038   -6396.411    -188.557         *
8                               Alley__Pave   3287.6724   1581.357  2.079  0.038     183.728    6391.617         *
9                              BedroomAbvGr  -2917.6677   5274.186 -0.553  0.580  -13300.000    7434.697          
10                  BedroomAbvGr_str__"1.0"   6236.4742  12700.000  0.490  0.624  -18700.000   31200.000          
11                  BedroomAbvGr_str__"2.0"   2998.9900   7567.986  0.396  0.692  -11900.000   17900.000          
12                  BedroomAbvGr_str__"3.0"    -45.6206   3666.690 -0.012  0.990   -7242.734    7151.492          
13                  BedroomAbvGr_str__"4.0"   7076.2427   4786.581  1.478  0.140   -2319.033   16500.000          
14                  BedroomAbvGr_str__"5.0" -25690.0000  11600.000 -2.222  0.027  -48400.000   -3000.884         *
15             BedroomAbvGr_str__Rare cases   9414.4073  12500.000  0.756  0.450  -15000.000   33900.000          
16                           BldgType__1Fam  -7985.6601  12900.000 -0.617  0.537  -33400.000   17400.000          
17                         BldgType__2fmCon   3088.0825  26800.000  0.115  0.908  -49600.000   55800.000          
18                         BldgType__Duplex  -2900.9887   6260.765 -0.463  0.643  -15200.000    9387.871          
19                          BldgType__Twnhs   -115.7867  14600.000 -0.008  0.994  -28800.000   28500.000          
20                         BldgType__TwnhsE   7909.5411  13600.000  0.580  0.562  -18900.000   34700.000          
21           BsmtCond_NA_indicator__Missing  -1686.3679   1149.566 -1.467  0.143   -3942.779     570.043          
22       BsmtCond_NA_indicator__Not missing   1681.5562   1149.320  1.463  0.144    -574.371    3937.484          
23                             BsmtCond__Fa  -1555.1252   3873.944 -0.401  0.688   -9159.044    6048.793          
24                             BsmtCond__Gd   -956.1862   3339.981 -0.286  0.775   -7512.023    5599.650          
25                             BsmtCond__TA   2506.4996   2387.370  1.050  0.294   -2179.518    7192.517          
26       BsmtExposure_NA_indicator__Missing  -1686.3679   1149.566 -1.467  0.143   -3942.779     570.043          
27   BsmtExposure_NA_indicator__Not missing   1681.5562   1149.320  1.463  0.144    -574.371    3937.484          
28                         BsmtExposure__Av  -3420.6587   2177.236 -1.571  0.117   -7694.217     852.900          
29                         BsmtExposure__Gd  17150.0000   3032.425  5.655  0.000   11200.000   23100.000       ***
30                         BsmtExposure__Mn  -6291.4475   2836.058 -2.218  0.027  -11900.000    -724.730         *
31                         BsmtExposure__No  -7440.6207   1880.599 -3.957  0.000  -11100.000   -3749.313       ***
32                               BsmtFinSF1     -3.8000      4.822 -0.788  0.431     -13.265       5.665          
33                               BsmtFinSF2     15.8494     12.050  1.315  0.189      -7.803      39.502          
34       BsmtFinType1_NA_indicator__Missing  -1686.3679   1149.566 -1.467  0.143   -3942.779     570.043          
35   BsmtFinType1_NA_indicator__Not missing   1681.5562   1149.320  1.463  0.144    -574.371    3937.484          
36                        BsmtFinType1__ALQ   1715.1004   2385.417  0.719  0.472   -2967.083    6397.284          
37                        BsmtFinType1__BLQ   1345.2990   2797.656  0.481  0.631   -4146.042    6836.640          
38                        BsmtFinType1__GLQ   5541.1455   2624.123  2.112  0.035     390.420   10700.000         *
39                        BsmtFinType1__LwQ  -4415.2560   3937.358 -1.121  0.262  -12100.000    3313.135          
40                        BsmtFinType1__Rec    750.7314   2905.428  0.258  0.796   -4952.150    6453.612          
41                        BsmtFinType1__Unf  -4941.8320   2656.778 -1.860  0.063  -10200.000     272.989         .
42       BsmtFinType2_NA_indicator__Missing  -1686.3679   1149.566 -1.467  0.143   -3942.779     570.043          
43   BsmtFinType2_NA_indicator__Not missing   1681.5562   1149.320  1.463  0.144    -574.371    3937.484          
44                        BsmtFinType2__ALQ  -3704.7676   7524.530 -0.492  0.623  -18500.000   11100.000          
45                        BsmtFinType2__BLQ   2027.5973   5835.434  0.347  0.728   -9426.406   13500.000          
46                        BsmtFinType2__GLQ   3743.2330   8508.306  0.440  0.660  -13000.000   20400.000          
47                        BsmtFinType2__LwQ  -2940.0163   5289.925 -0.556  0.579  -13300.000    7443.243          
48                        BsmtFinType2__Rec  -4366.9981   4419.232 -0.988  0.323  -13000.000    4307.233          
49                        BsmtFinType2__Unf   5236.1399   4893.554  1.070  0.285   -4369.107   14800.000          
50                             BsmtFullBath  -2110.4208  11500.000 -0.184  0.854  -24700.000   20400.000          
51                  BsmtFullBath_str__"0.0"   5264.9100  11800.000  0.447  0.655  -17800.000   28400.000          
52                  BsmtFullBath_str__"1.0"  12670.0000   4060.835  3.120  0.002    4699.509   20600.000        **
53                  BsmtFullBath_str__"2.0" -17940.0000  13300.000 -1.352  0.177  -44000.000    8104.623          
54                             BsmtHalfBath  -5754.7820  12900.000 -0.446  0.656  -31100.000   19600.000          
55                  BsmtHalfBath_str__"0.0"  -6809.4809   6713.388 -1.014  0.311  -20000.000    6367.804          
56                  BsmtHalfBath_str__"1.0"   6804.6691   6713.165  1.014  0.311   -6372.177   20000.000          
57           BsmtQual_NA_indicator__Missing  -1686.3679   1149.566 -1.467  0.143   -3942.779     570.043          
58       BsmtQual_NA_indicator__Not missing   1681.5562   1149.320  1.463  0.144    -574.371    3937.484          
59                             BsmtQual__Ex   4381.7531   4145.575  1.057  0.291   -3755.334   12500.000          
60                             BsmtQual__Fa   2477.9285   4913.453  0.504  0.614   -7166.376   12100.000          
61                             BsmtQual__Gd  -4594.7921   2609.322 -1.761  0.079   -9716.465     526.881         .
62                             BsmtQual__TA  -2269.7012   2554.383 -0.889  0.375   -7283.539    2744.136          
63                                BsmtUnfSF     -7.4200      5.172 -1.435  0.152     -17.572       2.732          
64                            CentralAir__N  -1657.8733   2821.606 -0.588  0.557   -7196.226    3880.479          
65                            CentralAir__Y   1653.0615   2821.659  0.586  0.558   -3885.394    7191.517          
66                       Condition1__Artery  -5304.5805   5234.016 -1.013  0.311  -15600.000    4968.939          
67                        Condition1__Feedr  -5092.7408   4322.090 -1.178  0.239  -13600.000    3390.816          
68                         Condition1__Norm   6516.3363   2759.200  2.362  0.018    1100.478   11900.000         *
69                         Condition1__PosN   9265.9151   7054.506  1.313  0.189   -4580.929   23100.000          
70                         Condition1__RRAe -20840.0000   8788.252 -2.372  0.018  -38100.000   -3593.152         *
71                         Condition1__RRAn   4027.1131   6063.362  0.664  0.507   -7874.277   15900.000          
72                   Condition1__Rare cases  11430.0000   8216.296  1.391  0.165   -4701.047   27600.000          
73                         Condition2__Norm   3280.6809   5208.090  0.630  0.529   -6941.950   13500.000          
74                   Condition2__Rare cases  -3285.4926   5208.143 -0.631  0.528  -13500.000    6937.242          
75                        Electrical__FuseA  -1218.9711   3496.674 -0.349  0.727   -8082.372    5644.429          
76                        Electrical__FuseF   4510.1330   5273.796  0.855  0.393   -5841.468   14900.000          
77                        Electrical__SBrkr  -3295.9736   3167.700 -1.040  0.298   -9513.651    2921.704          
78                            EnclosedPorch     26.0007     17.333  1.500  0.134      -8.022      60.023          
79                            ExterCond__Fa   5446.2409   5444.979  1.000  0.317   -5241.363   16100.000          
80                            ExterCond__Gd  -4886.0212   3426.984 -1.426  0.154  -11600.000    1840.589          
81                            ExterCond__TA   -565.0314   2912.153 -0.194  0.846   -6281.112    5151.050          
82                            ExterQual__Ex   7912.3072   6369.204  1.242  0.214   -4589.399   20400.000          
83                            ExterQual__Fa  -7393.1494  10600.000 -0.698  0.486  -28200.000   13400.000          
84                            ExterQual__Gd   3000.4397   4246.016  0.707  0.480   -5333.795   11300.000          
85                            ExterQual__TA  -3524.4092   4204.279 -0.838  0.402  -11800.000    4727.903          
86                     Exterior1st__AsbShng  20590.0000  21300.000  0.965  0.335  -21300.000   62500.000          
87                     Exterior1st__BrkFace   8714.9218   7230.872  1.205  0.228   -5478.099   22900.000          
88                     Exterior1st__CemntBd   4514.6631  26600.000  0.170  0.865  -47800.000   56800.000          
89                     Exterior1st__HdBoard  -2945.6319   6721.156 -0.438  0.661  -16100.000   10200.000          
90                     Exterior1st__MetalSd    943.3501  10500.000  0.090  0.928  -19600.000   21500.000          
91                     Exterior1st__Plywood   2388.9515   6814.090  0.351  0.726  -11000.000   15800.000          
92                      Exterior1st__Stucco -25930.0000  12400.000 -2.084  0.037  -50300.000   -1507.352         *
93                     Exterior1st__VinylSd -10190.0000   7881.645 -1.293  0.196  -25700.000    5280.509          
94                     Exterior1st__Wd Sdng   2352.3249   6164.913  0.382  0.703   -9748.393   14500.000          
95                     Exterior1st__WdShing   -446.8280   8364.569 -0.053  0.957  -16900.000   16000.000          
96                     Exterior2nd__AsbShng -27030.0000  20300.000 -1.328  0.184  -67000.000   12900.000          
97                     Exterior2nd__BrkFace    545.6956   9286.086  0.059  0.953  -17700.000   18800.000          
98                     Exterior2nd__CmentBd  -3320.5107  27700.000 -0.120  0.905  -57700.000   51000.000          
99                     Exterior2nd__HdBoard   2969.5810   6471.508  0.459  0.646   -9732.933   15700.000          
100                    Exterior2nd__ImStucc  10310.0000  10400.000  0.987  0.324  -10200.000   30800.000          
101                    Exterior2nd__MetalSd  -2047.8014  10600.000 -0.193  0.847  -22900.000   18800.000          
102                    Exterior2nd__Plywood  -3891.5240   5989.464 -0.650  0.516  -15600.000    7864.816          
103                 Exterior2nd__Rare cases    620.0915   9203.667  0.067  0.946  -17400.000   18700.000          
104                     Exterior2nd__Stucco   8899.7887  12000.000  0.741  0.459  -14700.000   32500.000          
105                    Exterior2nd__VinylSd  10370.0000   7506.844  1.381  0.168   -4367.347   25100.000          
106                    Exterior2nd__Wd Sdng   -586.5532   5819.852 -0.101  0.920  -12000.000   10800.000          
107                    Exterior2nd__Wd Shng   3160.2507   7486.242  0.422  0.673  -11500.000   17900.000          
108             Fence_NA_indicator__Missing  -1072.0532   1236.245 -0.867  0.386   -3498.600    1354.494          
109         Fence_NA_indicator__Not missing   1067.2415   1236.279  0.863  0.388   -1359.371    3493.854          
110                            Fence__GdPrv   -247.4536   2341.235 -0.106  0.916   -4842.915    4348.008          
111                             Fence__GdWo    175.8074   2394.677  0.073  0.941   -4524.552    4876.167          
112                            Fence__MnPrv   4573.9918   1961.358  2.332  0.020     724.167    8423.816         *
113                             Fence__MnWw  -4507.1574   4493.200 -1.003  0.316  -13300.000    4312.260          
114       FireplaceQu_NA_indicator__Missing   -418.7540    472.614 -0.886  0.376   -1346.418     508.911          
115   FireplaceQu_NA_indicator__Not missing    413.9423    472.561  0.876  0.381    -513.618    1341.502          
116                         FireplaceQu__Ex  -6437.8767   4713.071 -1.366  0.172  -15700.000    2813.112          
117                         FireplaceQu__Fa    847.0908   3182.469  0.266  0.790   -5399.576    7093.758          
118                         FireplaceQu__Gd   1250.5352   2026.104  0.617  0.537   -2726.376    5227.447          
119                         FireplaceQu__Po   2884.2039   3306.821  0.872  0.383   -3606.545    9374.953          
120                         FireplaceQu__TA   1451.2351   2031.248  0.714  0.475   -2535.773    5438.244          
121                              Fireplaces -21290.0000   4621.446 -4.607  0.000  -30400.000  -12200.000       ***
122                   Fireplaces_str__"0.0" -26430.0000   4645.656 -5.688  0.000  -35500.000  -17300.000       ***
123                   Fireplaces_str__"1.0"  -3889.3006   1411.020 -2.756  0.006   -6658.902   -1119.700        **
124                   Fireplaces_str__"2.0"  30310.0000   5025.302  6.032  0.000   20400.000   40200.000       ***
125                      Foundation__BrkTil   3354.3688   4646.104  0.722  0.471   -5765.174   12500.000          
126                      Foundation__CBlock   3792.7502   4010.990  0.946  0.345   -4080.167   11700.000          
127                       Foundation__PConc   6434.8297   4207.122  1.530  0.127   -1823.064   14700.000          
128                  Foundation__Rare cases  -8527.0468  10800.000 -0.791  0.429  -29700.000   12600.000          
129                        Foundation__Slab  -5059.7136   9965.919 -0.508  0.612  -24600.000   14500.000          
130                                FullBath   7199.5122   6168.302  1.167  0.243   -4907.857   19300.000          
131                     FullBath_str__"1.0" -13640.0000   8756.263 -1.558  0.120  -30800.000    3545.430          
132                     FullBath_str__"2.0" -16610.0000   3933.742 -4.222  0.000  -24300.000   -8885.256       ***
133                     FullBath_str__"3.0"  18020.0000   3815.521  4.722  0.000   10500.000   25500.000       ***
134                FullBath_str__Rare cases  12230.0000   8665.817  1.411  0.159   -4784.260   29200.000          
135                        Functional__Maj1 -11520.0000   8675.229 -1.328  0.185  -28500.000    5510.540          
136                        Functional__Min1  -1110.2593   6317.184 -0.176  0.861  -13500.000   11300.000          
137                        Functional__Min2   1755.2182   5888.496  0.298  0.766   -9802.937   13300.000          
138                         Functional__Mod  -2306.9353   8771.815 -0.263  0.793  -19500.000   14900.000          
139                         Functional__Typ  13170.0000   3893.289  3.384  0.001    5532.793   20800.000        **
140                              GarageArea    -21.1991     11.653 -1.819  0.069     -44.072       1.674         .
141                              GarageCars  21320.0000   7207.156  2.959  0.003    7175.950   35500.000        **
142                   GarageCars_str__"0.0"   2450.9042   1350.987  1.814  0.070    -200.863    5102.671         .
143                   GarageCars_str__"1.0"   6984.3607   7024.437  0.994  0.320   -6803.461   20800.000          
144                   GarageCars_str__"2.0"  -5461.2707   1745.080 -3.130  0.002   -8886.577   -2035.964        **
145                   GarageCars_str__"3.0"  -3978.8059   7916.515 -0.503  0.615  -19500.000   11600.000          
146        GarageCond_NA_indicator__Missing   2450.9042   1350.987  1.814  0.070    -200.863    5102.671         .
147    GarageCond_NA_indicator__Not missing  -2455.7159   1350.984 -1.818  0.069   -5107.477     196.045         .
148                          GarageCond__Fa   2161.9357   4778.123  0.452  0.651   -7216.739   11500.000          
149                          GarageCond__Gd  -3452.5931   7309.252 -0.472  0.637  -17800.000   10900.000          
150                          GarageCond__Po  -8412.5891   6823.765 -1.233  0.218  -21800.000    4981.346          
151                          GarageCond__TA   9698.4348   4136.154  2.345  0.019    1579.840   17800.000         *
152      GarageFinish_NA_indicator__Missing   2450.9042   1350.987  1.814  0.070    -200.863    5102.671         .
153  GarageFinish_NA_indicator__Not missing  -2455.7159   1350.984 -1.818  0.069   -5107.477     196.045         .
154                       GarageFinish__Fin   1947.3572   1732.519  1.124  0.261   -1453.295    5348.010          
155                       GarageFinish__RFn    169.2467   1478.740  0.114  0.909   -2733.278    3071.772          
156                       GarageFinish__Unf  -2121.4157   1819.537 -1.166  0.244   -5692.870    1450.038          
157        GarageQual_NA_indicator__Missing   2450.9042   1350.987  1.814  0.070    -200.863    5102.671         .
158    GarageQual_NA_indicator__Not missing  -2455.7159   1350.984 -1.818  0.069   -5107.477     196.045         .
159                          GarageQual__Fa  -7215.6312   4739.775 -1.522  0.128  -16500.000    2087.773          
160                          GarageQual__Gd   8306.8742   6096.541  1.363  0.173   -3659.640   20300.000          
161                          GarageQual__TA  -1096.0547   3643.802 -0.301  0.764   -8248.242    6056.133          
162        GarageType_NA_indicator__Missing   2450.9042   1350.987  1.814  0.070    -200.863    5102.671         .
163    GarageType_NA_indicator__Not missing  -2455.7159   1350.984 -1.818  0.069   -5107.477     196.045         .
164                      GarageType__Attchd  -3781.9543   3414.817 -1.108  0.268  -10500.000    2920.774          
165                     GarageType__Basment   6734.5572   7654.963  0.880  0.379   -8290.885   21800.000          
166                     GarageType__BuiltIn  -9104.0738   4859.471 -1.873  0.061  -18600.000     434.275         .
167                     GarageType__CarPort   3361.5580   9745.437  0.345  0.730  -15800.000   22500.000          
168                      GarageType__Detchd   2785.1012   3618.666  0.770  0.442   -4317.749    9887.952          
169                             GarageYrBlt    -26.2058     84.117 -0.312  0.755    -191.313     138.902          
170       GarageYrBlt_NA_indicator__Missing   2450.9042   1350.987  1.814  0.070    -200.863    5102.671         .
171   GarageYrBlt_NA_indicator__Not missing  -2455.7159   1350.984 -1.818  0.069   -5107.477     196.045         .
172                               GrLivArea     54.0179     12.266  4.404  0.000      29.941      78.094       ***
173                                HalfBath  -2823.3845   4441.158 -0.636  0.525  -11500.000    5893.882          
174                     HalfBath_str__"0.0"  -1204.6132   1038.534 -1.160  0.246   -3243.085     833.859          
175                     HalfBath_str__"1.0"   5222.9874   4463.390  1.170  0.242   -3537.918   14000.000          
176                     HalfBath_str__"2.0"  -4023.1860   4329.444 -0.929  0.353  -12500.000    4474.806          
177                           HeatingQC__Ex  -1277.0599   2282.880 -0.559  0.576   -5757.980    3203.860          
178                           HeatingQC__Fa   3111.6972   4548.069  0.684  0.494   -5815.420   12000.000          
179                           HeatingQC__Gd  -1511.9651   2311.765 -0.654  0.513   -6049.582    3025.652          
180                           HeatingQC__TA   -327.4839   2085.814 -0.157  0.875   -4421.596    3766.628          
181                           Heating__GasA  -1630.6700   5577.244 -0.292  0.770  -12600.000    9316.549          
182                           Heating__GasW   1183.3280   8255.308  0.143  0.886  -15000.000   17400.000          
183                     Heating__Rare cases    442.5303   8599.721  0.051  0.959  -16400.000   17300.000          
184                      HouseStyle__1.5Fin   2799.7658  12100.000  0.232  0.817  -20900.000   26500.000          
185                      HouseStyle__1.5Unf  14410.0000  21100.000  0.682  0.495  -27100.000   55900.000          
186                      HouseStyle__1Story   9975.3242   9679.832  1.031  0.303   -9024.604   29000.000          
187                      HouseStyle__2.5Unf -28630.0000  17900.000 -1.600  0.110  -63700.000    6489.714          
188                      HouseStyle__2Story  -1020.5285   8835.439 -0.116  0.908  -18400.000   16300.000          
189                  HouseStyle__Rare cases -12580.0000  21600.000 -0.584  0.560  -54900.000   29700.000          
190                      HouseStyle__SFoyer  17740.0000  13300.000  1.329  0.184   -8449.598   43900.000          
191                        HouseStyle__SLvl  -2696.5039  15500.000 -0.174  0.862  -33200.000   27800.000          
192                            KitchenAbvGr -11620.0000  11100.000 -1.045  0.296  -33400.000   10200.000          
193                 KitchenAbvGr_str__"1.0"  -1107.6733   6875.684 -0.161  0.872  -14600.000   12400.000          
194                 KitchenAbvGr_str__"2.0"   1102.8615   6875.539  0.160  0.873  -12400.000   14600.000          
195                         KitchenQual__Ex  13290.0000   4059.540  3.273  0.001    5319.672   21300.000        **
196                         KitchenQual__Fa   1866.6839   5950.301  0.314  0.754   -9812.784   13500.000          
197                         KitchenQual__Gd  -8384.4831   2678.421 -3.130  0.002  -13600.000   -3127.180        **
198                         KitchenQual__TA  -6774.8987   2672.687 -2.535  0.011  -12000.000   -1528.851         *
199                        LandContour__Bnk  -7450.7425   4339.721 -1.717  0.086  -16000.000    1067.421         .
200                        LandContour__HLS   8469.4449   4556.821  1.859  0.063    -474.850   17400.000         .
201                        LandContour__Low  -3556.6689   6141.224 -0.579  0.563  -15600.000    8497.551          
202                        LandContour__Lvl   2533.1548   3149.840  0.804  0.422   -3649.466    8715.776          
203                          LandSlope__Gtl  10450.0000   6155.984  1.698  0.090   -1632.559   22500.000         .
204                          LandSlope__Mod  13480.0000   5956.494  2.263  0.024    1785.724   25200.000         *
205                          LandSlope__Sev -23930.0000  10600.000 -2.252  0.025  -44800.000   -3072.523         *
206                                 LotArea      0.5264      0.167  3.149  0.002       0.198       0.855        **
207                       LotConfig__Corner    953.6597   2331.865  0.409  0.683   -3623.410    5530.729          
208                      LotConfig__CulDSac   8850.0494   3077.143  2.876  0.004    2810.121   14900.000        **
209                          LotConfig__FR2  -9638.9963   3871.384 -2.490  0.013  -17200.000   -2040.102         *
210                       LotConfig__Inside   -169.5245   1907.299 -0.089  0.929   -3913.241    3574.192          
211                             LotFrontage   -102.5066     64.470 -1.590  0.112    -229.050      24.037          
212       LotFrontage_NA_indicator__Missing    578.6439   1315.690  0.440  0.660   -2003.840    3161.127          
213   LotFrontage_NA_indicator__Not missing   -583.4556   1315.708 -0.443  0.658   -3165.976    1999.064          
214                           LotShape__IR1   8042.0548   3412.920  2.356  0.019    1343.051   14700.000         *
215                           LotShape__IR2   2345.1283   5030.765  0.466  0.641   -7529.442   12200.000          
216                           LotShape__IR3 -19880.0000   8730.671 -2.277  0.023  -37000.000   -2738.846         *
217                           LotShape__Reg   9483.7324   3592.427  2.640  0.008    2432.386   16500.000        **
218                            LowQualFinSF     37.8531     33.607  1.126  0.260     -28.111     103.817          
219                 LowQualFinSF_str__"0.0"   6051.7586   7010.819  0.863  0.388   -7709.334   19800.000          
220            LowQualFinSF_str__Rare cases  -6056.5703   7010.754 -0.864  0.388  -19800.000    7704.395          
221                              MSSubClass    371.9716    908.882  0.409  0.682   -1412.016    2155.959          
222                 MSSubClass_str__"120.0" -31800.0000  32000.000 -0.994  0.321  -94600.000   31000.000          
223                 MSSubClass_str__"160.0" -63070.0000  66600.000 -0.947  0.344 -194000.000   67700.000          
224                 MSSubClass_str__"180.0" -68040.0000  85200.000 -0.799  0.425 -235000.000   99200.000          
225                 MSSubClass_str__"190.0" -41950.0000  96800.000 -0.433  0.665 -232000.000  148000.000          
226                  MSSubClass_str__"20.0"  41870.0000  64100.000  0.653  0.514  -83900.000  168000.000          
227                  MSSubClass_str__"30.0"  32760.0000  56300.000  0.582  0.561  -77700.000  143000.000          
228                  MSSubClass_str__"45.0"  26740.0000  46200.000  0.579  0.563  -63900.000  117000.000          
229                  MSSubClass_str__"50.0"  23330.0000  37800.000  0.617  0.538  -50900.000   97600.000          
230                  MSSubClass_str__"60.0"  11150.0000  28200.000  0.395  0.693  -44200.000   66500.000          
231                  MSSubClass_str__"70.0"  20410.0000  21100.000  0.965  0.335  -21100.000   61900.000          
232                  MSSubClass_str__"75.0"  38020.0000  25700.000  1.477  0.140  -12500.000   88500.000          
233                  MSSubClass_str__"80.0"  21480.0000  18300.000  1.173  0.241  -14500.000   57400.000          
234                  MSSubClass_str__"85.0"  -7987.7693  14700.000 -0.544  0.587  -36800.000   20800.000          
235                  MSSubClass_str__"90.0"  -2900.9887   6260.765 -0.463  0.643  -15200.000    9387.871          
236                       MSZoning__C (all) -28550.0000  11900.000 -2.404  0.016  -51900.000   -5242.391         *
237                            MSZoning__FV  20330.0000   7975.915  2.549  0.011    4676.916   36000.000         *
238                            MSZoning__RH   3818.1883   8386.567  0.455  0.649  -12600.000   20300.000          
239                            MSZoning__RL   4334.1426   4438.319  0.977  0.329   -4377.552   13000.000          
240                            MSZoning__RM     60.6392   5026.645  0.012  0.990   -9805.843    9927.122          
241                              MasVnrArea      8.8044      8.307  1.060  0.290      -7.502      25.110          
242    MasVnrArea_NA_indicator__Not missing   3783.0955   2906.879  1.301  0.193   -1922.632    9488.823          
243     MasVnrArea_NA_indicator__Rare cases  -3787.9072   2906.902 -1.303  0.193   -9493.681    1917.866          
244    MasVnrType_NA_indicator__Not missing   3783.0955   2906.879  1.301  0.193   -1922.632    9488.823          
245     MasVnrType_NA_indicator__Rare cases  -3787.9072   2906.902 -1.303  0.193   -9493.681    1917.866          
246                      MasVnrType__BrkCmn -15160.0000   7227.306 -2.097  0.036  -29300.000    -969.060         *
247                     MasVnrType__BrkFace   2572.3989   2916.086  0.882  0.378   -3151.402    8296.200          
248                        MasVnrType__None   5567.4248   3104.407  1.793  0.073    -526.020   11700.000         .
249                       MasVnrType__Stone   7010.4457   3624.076  1.934  0.053    -103.024   14100.000         .
250       MiscFeature_NA_indicator__Missing  -1342.9831  11200.000 -0.120  0.905  -23300.000   20600.000          
251   MiscFeature_NA_indicator__Not missing   1338.1714  11200.000  0.119  0.905  -20600.000   23300.000          
252                 MiscFeature__Rare cases    193.2522  12500.000  0.015  0.988  -24400.000   24700.000          
253                       MiscFeature__Shed   4382.1547   6879.837  0.637  0.524   -9121.841   17900.000          
254                       MiscFeature__TenC  -4580.2187   8242.321 -0.556  0.579  -20800.000   11600.000          
255                                 MiscVal      4.5326      5.027  0.902  0.368      -5.335      14.400          
256                      MiscVal_str__"0.0"   6867.6642  15700.000  0.436  0.663  -24000.000   37800.000          
257                    MiscVal_str__"400.0"  -2507.4066  10400.000 -0.241  0.809  -22900.000   17900.000          
258                 MiscVal_str__Rare cases  -4365.0694   9388.219 -0.465  0.642  -22800.000   14100.000          
259                                  MoSold    -40.2678    343.999 -0.117  0.907    -715.481     634.946          
260                       MoSold_str__"1.0"   -356.6960   3556.012 -0.100  0.920   -7336.566    6623.174          
261                      MoSold_str__"10.0"  -4000.7022   3276.907 -1.221  0.222  -10400.000    2431.331          
262                      MoSold_str__"11.0"   5188.0677   3348.288  1.549  0.122   -1384.075   11800.000          
263                      MoSold_str__"12.0"   -341.1583   3404.138 -0.100  0.920   -7022.926    6340.609          
264                       MoSold_str__"2.0"  -2252.7998   3723.738 -0.605  0.545   -9561.890    5056.290          
265                       MoSold_str__"3.0"    227.8194   3089.357  0.074  0.941   -5836.084    6291.723          
266                       MoSold_str__"4.0"    228.5182   2864.891  0.080  0.936   -5394.795    5851.831          
267                       MoSold_str__"5.0"   3079.9956   2488.910  1.237  0.216   -1805.328    7965.320          
268                       MoSold_str__"6.0"   1490.7548   2188.979  0.681  0.496   -2805.854    5787.363          
269                       MoSold_str__"7.0"   3834.7253   2211.987  1.734  0.083    -507.044    8176.495         .
270                       MoSold_str__"8.0"  -2999.1183   2984.160 -1.005  0.315   -8856.537    2858.300          
271                       MoSold_str__"9.0"  -4104.2182   3905.217 -1.051  0.294  -11800.000    3561.084          
272                   Neighborhood__Blmngtn  -2405.0816   9971.748 -0.241  0.809  -22000.000   17200.000          
273                    Neighborhood__BrDale   9851.3805  11200.000  0.877  0.381  -12200.000   31900.000          
274                   Neighborhood__BrkSide  -2760.2576   6654.659 -0.415  0.678  -15800.000   10300.000          
275                   Neighborhood__ClearCr   5860.3200   7524.168  0.779  0.436   -8908.393   20600.000          
276                   Neighborhood__CollgCr    429.6268   3933.180  0.109  0.913   -7290.562    8149.816          
277                   Neighborhood__Crawfor   8207.5569   6146.625  1.335  0.182   -3857.265   20300.000          
278                   Neighborhood__Edwards -20340.0000   4167.733 -4.881  0.000  -28500.000  -12200.000       ***
279                   Neighborhood__Gilbert  -1724.1870   5054.816 -0.341  0.733  -11600.000    8197.591          
280                    Neighborhood__IDOTRR -10990.0000   8823.758 -1.245  0.213  -28300.000    6333.598          
281                   Neighborhood__MeadowV  -3435.2006  12700.000 -0.271  0.786  -28300.000   21400.000          
282                   Neighborhood__Mitchel -10220.0000   5083.734 -2.010  0.045  -20200.000    -240.406         *
283                     Neighborhood__NAmes  -9022.0852   3677.284 -2.453  0.014  -16200.000   -1804.177         *
284                    Neighborhood__NWAmes -15400.0000   5647.824 -2.727  0.007  -26500.000   -4315.506        **
285                   Neighborhood__NoRidge  33900.0000   6727.434  5.039  0.000   20700.000   47100.000       ***
286                   Neighborhood__NridgHt  23540.0000   5696.181  4.133  0.000   12400.000   34700.000       ***
287                   Neighborhood__OldTown -13710.0000   6833.774 -2.006  0.045  -27100.000    -291.589         *
288                Neighborhood__Rare cases   6678.2466  11500.000  0.579  0.562  -15900.000   29300.000          
289                     Neighborhood__SWISU -24000.0000   8792.284 -2.730  0.006  -41300.000   -6745.482        **
290                    Neighborhood__Sawyer  -5879.9162   4775.703 -1.231  0.219  -15300.000    3494.008          
291                   Neighborhood__SawyerW   1433.9447   4828.371  0.297  0.767   -8043.360   10900.000          
292                   Neighborhood__Somerst  -1250.2866   7918.644 -0.158  0.875  -16800.000   14300.000          
293                   Neighborhood__StoneBr  34010.0000   7888.393  4.311  0.000   18500.000   49500.000       ***
294                    Neighborhood__Timber -11840.0000   6608.489 -1.792  0.073  -24800.000    1128.047         .
295                   Neighborhood__Veenker   9057.7049   9525.296  0.951  0.342   -9638.896   27800.000          
296                             OpenPorchSF     23.5879     15.876  1.486  0.138      -7.574      54.750          
297                             OverallCond   1754.3417   6936.585  0.253  0.800  -11900.000   15400.000          
298                  OverallCond_str__"3.0" -15170.0000  21600.000 -0.704  0.482  -57500.000   27100.000          
299                  OverallCond_str__"4.0"  -7764.8147  14800.000 -0.524  0.600  -36800.000   21300.000          
300                  OverallCond_str__"5.0"  -5589.7146   7867.117 -0.711  0.478  -21000.000    9852.151          
301                  OverallCond_str__"6.0"   2043.1315   2814.799  0.726  0.468   -3481.860    7568.123          
302                  OverallCond_str__"7.0"   7823.9711   7622.835  1.026  0.305   -7138.408   22800.000          
303                  OverallCond_str__"8.0"   9548.4183  14600.000  0.655  0.513  -19100.000   38200.000          
304                  OverallCond_str__"9.0"   9109.0698  22000.000  0.415  0.678  -34000.000   52200.000          
305                             OverallQual   1983.6366   6791.723  0.292  0.770  -11300.000   15300.000          
306                 OverallQual_str__"10.0"  55500.0000  24800.000  2.233  0.026    6721.551  104000.000         *
307                  OverallQual_str__"3.0" -17780.0000  24100.000 -0.739  0.460  -65000.000   29500.000          
308                  OverallQual_str__"4.0" -14290.0000  16800.000 -0.849  0.396  -47300.000   18800.000          
309                  OverallQual_str__"5.0" -20170.0000  10900.000 -1.854  0.064  -41500.000    1183.551         .
310                  OverallQual_str__"6.0" -19490.0000   4815.120 -4.048  0.000  -28900.000  -10000.000       ***
311                  OverallQual_str__"7.0" -14130.0000   4552.060 -3.104  0.002  -23100.000   -5196.601        **
312                  OverallQual_str__"8.0"  -1085.6766  10400.000 -0.104  0.917  -21500.000   19300.000          
313                  OverallQual_str__"9.0"  31450.0000  17600.000  1.782  0.075   -3185.175   66100.000         .
314                           PavedDrive__N   3136.1507   3477.582  0.902  0.367   -3689.775    9962.077          
315                           PavedDrive__P  -3798.6032   4199.782 -0.904  0.366  -12000.000    4444.883          
316                           PavedDrive__Y    657.6408   2779.464  0.237  0.813   -4797.993    6113.274          
317                                PoolArea     -8.1456     26.330 -0.309  0.757     -59.827      43.536          
318                              PoolQC__Ex  -3243.4115   3222.878 -1.006  0.315   -9569.395    3082.572          
319                              PoolQC__Fa   8739.4732   5773.192  1.514  0.130   -2592.359   20100.000          
320                              PoolQC__Gd  -5500.8734   3223.896 -1.706  0.088  -11800.000     827.108         .
321                       RoofMatl__CompShg   8915.2422   7608.402  1.172  0.242   -6018.808   23800.000          
322                    RoofMatl__Rare cases  13930.0000   7889.193  1.766  0.078   -1555.656   29400.000         .
323                       RoofMatl__Tar&Grv -22850.0000  12000.000 -1.902  0.058  -46400.000     729.984         .
324                         RoofStyle__Flat  11000.0000  15600.000  0.706  0.480  -19600.000   41600.000          
325                        RoofStyle__Gable  -8087.0594   5292.277 -1.528  0.127  -18500.000    2300.817          
326                      RoofStyle__Gambrel  -3055.7320   9425.583 -0.324  0.746  -21600.000   15400.000          
327                          RoofStyle__Hip  -7536.8044   5552.231 -1.357  0.175  -18400.000    3361.318          
328                   RoofStyle__Rare cases   7675.3582  10800.000  0.711  0.477  -13500.000   28900.000          
329                  SaleCondition__Abnorml  -4760.7094   5480.135 -0.869  0.385  -15500.000    5995.900          
330                   SaleCondition__Alloca    550.5266  11200.000  0.049  0.961  -21500.000   22600.000          
331                   SaleCondition__Family   1800.9023   7119.335  0.253  0.800  -12200.000   15800.000          
332                   SaleCondition__Normal   1920.4209   4757.179  0.404  0.687   -7417.144   11300.000          
333                  SaleCondition__Partial    484.0479  14000.000  0.035  0.972  -26900.000   27900.000          
334                           SaleType__COD  -4057.7432   6637.617 -0.611  0.541  -17100.000    8970.816          
335                           SaleType__New   6082.6969  13100.000  0.463  0.643  -19700.000   31900.000          
336                    SaleType__Rare cases   6229.5543   6591.435  0.945  0.345   -6708.355   19200.000          
337                            SaleType__WD  -8259.3197   5191.945 -1.591  0.112  -18500.000    1931.621          
338                             ScreenPorch     65.3563     16.306  4.008  0.000      33.350      97.362       ***
339                            TotRmsAbvGrd   1442.9043   5429.991  0.266  0.791   -9215.281   12100.000          
340                TotRmsAbvGrd_str__"10.0"   8582.9861  13700.000  0.627  0.531  -18300.000   35500.000          
341                TotRmsAbvGrd_str__"11.0"  11050.0000  19300.000  0.573  0.567  -26800.000   48900.000          
342                TotRmsAbvGrd_str__"12.0" -39060.0000  26400.000 -1.477  0.140  -91000.000   12800.000          
343                 TotRmsAbvGrd_str__"3.0"  -3255.0682  24800.000 -0.131  0.896  -52000.000   45500.000          
344                 TotRmsAbvGrd_str__"4.0"   2314.5620  18800.000  0.123  0.902  -34500.000   39100.000          
345                 TotRmsAbvGrd_str__"5.0"   3842.8260  13500.000  0.285  0.776  -22600.000   30300.000          
346                 TotRmsAbvGrd_str__"6.0"   6572.5656   8329.981  0.789  0.430   -9777.825   22900.000          
347                 TotRmsAbvGrd_str__"7.0"   5729.3691   3948.586  1.451  0.147   -2021.060   13500.000          
348                 TotRmsAbvGrd_str__"8.0"   2298.1803   4358.857  0.527  0.598   -6257.544   10900.000          
349                 TotRmsAbvGrd_str__"9.0"   1920.7105   8970.486  0.214  0.831  -15700.000   19500.000          
350                             TotalBsmtSF      4.6293      6.069  0.763  0.446      -7.283      16.542          
351                              WoodDeckSF      7.1014      7.973  0.891  0.373      -8.548      22.751          
352                               YearBuilt    377.6051    122.994  3.070  0.002     136.188     619.022        **
353                            YearRemodAdd     79.4149     75.849  1.047  0.295     -69.464     228.293          
354                                  YrSold   -428.1827    151.320 -2.830  0.005    -725.199    -131.166        **
355                    YrSold_str__"2006.0"   -947.4036   1826.616 -0.519  0.604   -4532.753    2637.946          
356                    YrSold_str__"2007.0"  -1712.1666   1737.426 -0.985  0.325   -5122.451    1698.117          
357                    YrSold_str__"2008.0"   -154.2332   1744.307 -0.088  0.930   -3578.022    3269.556          
358                    YrSold_str__"2009.0"     -8.8170   1736.092 -0.005  0.996   -3416.482    3398.848          
359                    YrSold_str__"2010.0"   2817.8087   2282.426  1.235  0.217   -1662.221    7297.838          

-------------------------

 --- Model statistic --- 

R-squared         : 0.925
Adj. R-squared    : 0.901
F-statistic       : 38
Prob (F-statistic): 0.0
No. Observations  : 1095
AIC               : 25536
Df Residuals      : 827
BIC               : 26876
RMSE (test)       : 32690

-------------------------

Maximum correlation between Reseduals and any data columns is 6.521258747279969e-13, with columns <LotArea>
Mean of train reseduals: 2.850851758498035e-08

 ------------------------------------- Random Forest -------------------------------------


-------------------------

RF model peramters:

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'mse',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 200,
 'n_jobs': None,
 'oob_score': True,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

-------------------------

-------------------------

--- Model statistic ---
R^2 (test) : 0.8402415243284254
R^2 (train): 0.9809782507599958
RMSE (test): 30828
oob score  : 0.859404130508701

-------------------------

Maximum correlation between Reseduals and any data columns is 0.3735543313645223, with columns <TotRmsAbvGrd_str__"11.0">